#!/bin/sh

Tumor=$1
Normal=$2

Raw_snp=`cat ${vcf_path}/${Tumor}_${Normal}_PASS.vcf | grep -v "#" | wc -l`
#############################################################################################################################QC
## SimpleRepeats Region Delete
## cat ${ref_path}/SimpleRepeats/GRCh38_SegmentalDups.bed | sed '1d' | awk -F'\t' '{OFS="\t"}{print $2,$3,$4}' > ${ref_path}/SimpleRepeats/GRCh38_SegmentalDups.use.bed
## cat ${ref_path}/SimpleRepeats/GRCh38_SimpleRepeats.bed | sed '1d' | awk -F'\t' '{OFS="\t"}{print $2,$3,$4}' > ${ref_path}/SimpleRepeats/GRCh38_SimpleRepeats.use.bed

## SimpleRepeats
bedtools intersect -a ${vcf_path}/${Tumor}_${Normal}_PASS.vcf -b ${ref_path}/SimpleRepeats/GRCh38_SegmentalDups.use.bed -v -header \
> ${tmp_path}/${Tumor}_${Normal}_QC1.SegmentalDups.vcf
SegmentalDups_QC_num=`cat ${tmp_path}/${Tumor}_${Normal}_QC1.SegmentalDups.vcf | grep -v "#" | wc -l`

## SegmentalDups
bedtools intersect -a ${tmp_path}/${Tumor}_${Normal}_QC1.SegmentalDups.vcf -b ${ref_path}/SimpleRepeats/GRCh38_SimpleRepeats.use.bed -v -header \
> ${tmp_path}/${Tumor}_${Normal}_QC1.vcf
SimpleRepeats_QC_num=`cat ${tmp_path}/${Tumor}_${Normal}_QC1.vcf | grep -v "#" | wc -l`


#############################################################################################################################QC
## INDEL突变，Alt和Ref的长度差超过50,认为是SV
cat ${tmp_path}/${Tumor}_${Normal}_QC1.vcf | \
awk -F'\t' '{if($0~"##"){print};if((length($4)-length($5))^2<=(length_limit^2)){print}}' length_limit=50 \
> ${tmp_path}/${Tumor}_${Normal}_QC2.vcf
SV_QC_num=`cat ${tmp_path}/${Tumor}_${Normal}_QC2.vcf | grep -v "#" | wc -l`


#############################################################################################################################QC
## 去除XY染色体
cat ${tmp_path}/${Tumor}_${Normal}_QC2.vcf | awk -F'\t' '{if($0~"##" || ($1!~"X" && $1!~"Y")){print}}'  \
> ${tmp_path}/${Tumor}_${Normal}_QC3.vcf

sex_chr_QC_num=`cat ${tmp_path}/${Tumor}_${Normal}_QC3.vcf | grep -v "#" | wc -l`

#############################################################################################################################QC
## summary
echo $Tumor","$Raw_snp","${SegmentalDups_QC_num}","$SimpleRepeats_QC_num","$SV_QC_num","$sex_chr_QC_num  >> ${vcf_path}/Vcf_QC.list
cp ${tmp_path}/${Tumor}_${Normal}_QC3.vcf ${vcf_path}/${Tumor}_${Normal}_QC.vcf
